library(Zelig)
library(MatchIt)
library(survival)
library(Hmisc)

dat <- as.data.frame(read.csv("CongoPrefects.csv", sep=","))


#################################################
#		CONGO: TABLE 1 DESCIPTIVE STATS			#
#################################################

length(unique(dat$Portfolio))
length(unique(dat$LastName))
nrow(dat)

mean(na.omit(dat$NonNative)); sd(na.omit(dat$NonNative))
mean(na.omit(dat$TenurePeriodStop)); sd(na.omit(dat$TenurePeriodStop))
mean(na.omit(dat$AppointedRegionHostile)); sd(na.omit(dat$AppointedRegionHostile))
mean(na.omit(log(dat$AppointedRegionPopulation2007))); sd(na.omit(log(dat$AppointedRegionPopulation2007)))
mean(na.omit(log(dat$AppointedRegionArea))); sd(na.omit(log(dat$AppointedRegionArea)))
mean(na.omit(dat$CivilWarPost)); sd(na.omit(dat$CivilWarPost))



#################################################
#			CONGO: TABLE 2 POSTING MODELS		#
#################################################

dat$log.population <- log(dat$AppointedRegionPopulation2007)
dat$log.area <- log(dat$AppointedRegionArea)

model1 <- zelig(NonNative ~ AppointedRegionHostile + CivilWarPost + log.population + log.area, data=dat, model="logit")
summary(model1)

x0 <- setx(model1, AppointedRegionHostile=0, CivilWarPost=0, log.population=mean(dat$log.population, na.rm=T), log.area=mean(na.omit(dat$log.area)))
s0 <- sim(model1, x=x0, num=10000); summary(s0)
x1 <- setx(model1, AppointedRegionHostile=1, CivilWarPost=0, log.population=mean(dat$log.population, na.rm=T), log.area=mean(na.omit(dat$log.area)))
s1 <- sim(model1, x=x1, num=10000); summary(s1)


model2 <- zelig(NonNative ~ AppointedRegionHostile + CivilWarPost + log.population + log.area + NonNative.l1, data=dat, model="logit")
summary(model2)
x0 <- setx(model2, AppointedRegionHostile=0, CivilWarPost=0, log.population=mean(dat$log.population, na.rm=T), log.area=mean(na.omit(dat$log.area)), NonNative.l1=0)
s0 <- sim(model2, x=x0, num=10000); summary(s0)
x1 <- setx(model2, AppointedRegionHostile=1, CivilWarPost=0, log.population=mean(dat$log.population, na.rm=T), log.area=mean(na.omit(dat$log.area)), NonNative.l1=0)
s1 <- sim(model2, x=x1, num=10000); summary(s1)


#################################################
#		CONGO: TABLE 3 TENURE MODELS			#
#################################################

model3 <- coxph(Surv(TenurePeriodStart, TenurePeriodStop, TenureStop) ~ AppointedRegionHostile + CivilWarPost + NonNative + log.population + log.area, data=dat, method="efron")
summary(model3)

model4 <- coxph(Surv(TenurePeriodStart, TenurePeriodStop, TenureStop) ~ AppointedRegionHostile + CivilWarPost + NonNative + log.population + log.area + frailty(Portfolio, distribution="gaussian"), data=dat, method="efron")
summary(model4)


#################################################
#		CONGO: FIGURE 5 SURVIVAL PLOT			#
#################################################

newdat <- data.frame(AppointedRegionHostile=c(0,1), CivilWarPost=c(0,0), log.population=rep(mean(na.omit(dat$log.population)),2), log.area=rep(mean(na.omit(dat$log.area)),2), NonNative=c(0,0))
par(mar=c(4,4,0,1), bg=NA)
plot(survfit(model3, newdata=newdat), lwd=5, lty=c(1,3), xlab="", ylab="", main="", xlim=c(0,12), ylim=c(0,1), axes=F, col=c("Black"))
axis(1, at=seq(from=0, to=12, by=2), labels=seq(from=0, to=12, by=2), las=1, tick=T, cex.axis=1.2, col="Black", col.ticks="Black", col.axis="Black")
mtext("Years Since Appointment by Sassou Nguesso", 1, line=3, col="Black", cex=1.3)
axis(2, cex.axis=1.2, col="Black", col.ticks="Black", col.axis="Black", las=2)
mtext("Proportion of Executives Not Terminated", 2, line=3, col="Black", cex=1.3)
text(8, 0.95, "Sassou Nguesso's Core Regions", cex=1.2)
text(6.5, 0.4, "Regions Outside Sassou Nguesso's Core", cex=1.2)
dev.print(file="CongoTenureHazard.pdf",device=pdf)


#################################################
#				CONGO: SI TABLE 2				#
#################################################

model1 <- zelig(CrossCleavage ~ AppointedRegionHostile + CivilWarPost + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) , data=dat, model="logit")
summary(model1)

model2 <- zelig(CrossCleavage ~ AppointedRegionHostile + CivilWarPost + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) + CrossCleavage.l1, data=dat, model="logit")
summary(model2)


#################################################
#				CONGO: SI TABLE 3				#
#################################################

model3 <- coxph(Surv(TenurePeriodStart, TenurePeriodStop, TenureStop) ~ AppointedRegionHostile + CivilWarPost + CrossCleavage + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) , data=dat, method="efron")
summary(model3)

model4 <- coxph(Surv(TenurePeriodStart, TenurePeriodStop, TenureStop) ~ AppointedRegionHostile + CivilWarPost + CrossCleavage + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) + frailty(Portfolio, distribution="gaussian"), data=dat, method="efron")
summary(model4)


#################################################
#				CONGO: SI TABLE 7				#
#################################################

model1 <- zelig(NonNative ~ AppointedRegionHostile*acledCount + CivilWarPost + log(AppointedRegionPopulation2007) + log(AppointedRegionArea), data=dat[dat$Year>=1998,], model="logit")
summary(model1)

model2 <- zelig(NonNative ~ AppointedRegionHostile*acledCount + CivilWarPost + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) + NonNative.l1, data=dat[dat$Year>=1998,], model="logit")
summary(model2)


#################################################
#				CONGO: SI TABLE 8				#
#################################################

model1 <- coxph(Surv(TenurePeriodStart, TenurePeriodStop, TenureStop) ~ AppointedRegionHostile*acledCount + CivilWarPost + NonNative + log(AppointedRegionPopulation2007) + log(AppointedRegionArea), data=dat[dat$Year>1998,], method="efron")
summary(model1)

model2 <- coxph(Surv(TenurePeriodStart, TenurePeriodStop, TenureStop) ~ AppointedRegionHostile*acledCount + CivilWarPost + NonNative + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) + frailty(Portfolio, distribution="gaussian"), data=dat[dat$Year>=1998,], method="efron")
summary(model2)



#################################################
#				CONGO: SI TABLE XX SCAD				#
#################################################

model1 <- zelig(NonNative ~ AppointedRegionHostile*scadCount + CivilWarPost + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) + acledCount, data=dat[dat$Year>=1998,], model="logit")
summary(model1)

model2 <- zelig(NonNative ~ AppointedRegionHostile*scadCount + CivilWarPost + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) + NonNative.l1, data=dat[dat$Year>=1998,], model="logit")
summary(model2)


#################################################
#				CONGO: SI TABLE XX SCAD				#
#################################################

model1 <- coxph(Surv(TenurePeriodStart, TenurePeriodStop, TenureStop) ~ AppointedRegionHostile*scadCount + CivilWarPost + NonNative + log(AppointedRegionPopulation2007) + log(AppointedRegionArea), data=dat[dat$Year>1998,], method="efron")
summary(model1)

model2 <- coxph(Surv(TenurePeriodStart, TenurePeriodStop, TenureStop) ~ AppointedRegionHostile*scadCount + CivilWarPost + NonNative + log(AppointedRegionPopulation2007) + log(AppointedRegionArea) + frailty(Portfolio, distribution="gaussian"), data=dat[dat$Year>=1998,], method="efron")
summary(model2)





#####################################################################
#			Randomization Inference -- Data preparation				#
#####################################################################

#setwd("/Users/blcarter3/Dropbox/Congo Data/")
dat <- as.data.frame(read.csv("CongoPrefects.csv", sep=","))
elites <- as.data.frame(read.csv("CongoElites.csv", sep=","))

cset1 <- as.factor(dat$PoBDepartment[dat$PoBDepartment!=""])							#Counterfactual set: Appointed prefects
cset2 <- as.factor(elites$PoBDepartment[elites$Minister==1&elites$PoBDepartment!=""])	#Counterfactual set: Ministers
cset3 <- as.factor(na.omit(elites$PoBDepartment[elites$Dsn2AppointmentHighLevel>=1&elites$PoBDepartment!=""]))		#Counterfactual set: High level
cset1l <- dat$CivilWarPost[dat$PoBDepartment!=""]							#Counterfactual set: Appointed prefects
cset2l <- elites$CivilWarPost[elites$Minister==1&elites$PoBDepartment!=""]	#Counterfactual set: Ministers
cset3l <- na.omit(elites$CivilWarPost[elites$Dsn2AppointmentHighLevel>=1&elites$PoBDepartment!=""])

set.seed(12345)

#####################################################################
#		Randomization Inference -- Prefects as counterfactual pool	#
#####################################################################

cset <- cset1
csetl <- cset1l
sims <- 1000
coefs.p <- matrix(nrow=sims, ncol = 2)		#matrix of blank coefficients

for (i in 1:sims){
if((i %% 100==0)==T){print(paste('On',i,'of',sims,'rows,',round((i/sims)*100,2),'percent'))}
	x <- sample(cset, nrow(dat), replace=T)				#Sampling, with replacement, regions of 202 prefects (ataset is 202 rows)
	mat <- matrix(nrow=nrow(dat), ncol=8) 				#Matrix "identical" to actual data but switch PoBRegions based on draws
	mat <- as.data.frame(mat)
	
	mat[,1] <- x 										#or x_withtenurelength if we do tenure
	mat[,2] <- dat$Portfolio 							#Taking Portfolio names from actual data
	mat[,3] <- dat$AppointedRegion 						#Recording region of appointment
	for(j in 1:nrow(mat)){
		mat[j,4] <- ifelse(as.character(mat$V1[j])==as.character(mat$V3[j]), 1, 0)	#Coding whether there is a "match"
	}
	mat[,4] <- (1 - mat$V4)								#Coding whether there is no "match"
	mat[,5] <- sample(csetl, nrow(dat), replace=T)		#Sampling loyalty variable. Must be T when cset1 because of sample size
	mat[,6] <- dat$AppointedRegionPopulation2007
	mat[,7] <- dat$AppointedRegionHostile
	mat[,8] <- dat$AppointedRegionArea

	colnames(mat) <- c("RandomOfficer", "Portfolio", "AppointedRegion", "NonNative", "CivilWarPost", "AppointedRegionPopulation2007", "AppointedRegionHostile", "AppointedRegionArea")

	ri <- glm(NonNative ~ CivilWarPost + AppointedRegionPopulation2007 + AppointedRegionHostile + AppointedRegionArea, family="binomial", dat=mat)		#Randomization inference model for new dataset
	summary(ri)
#	cl.ri <- summary(ri)$coefficients[,2]				#getting SEs

	coefs.p[i,1] <- summary(ri)$coefficients[4,1] 			#recording coefficient on explanatory variable
	coefs.p[i,2] <- summary(ri)$coefficients[4,4] 		#recording SEs on main explanatory variable
}

hist(coefs.p[,1], xlim=c(-3, 8), breaks = 200, main = "Randomization Inference for Sassou Nguesso's Congo
 (using Prefects)", xlab = "Coefficient Values on Main Result, 1000 simulations")
abline(v=6.69, col="Red", lty=2)						#Coefficient from main tables
dnorm(6.69, mean=mean(coefs.p[,1]),sd=sd(coefs.p[,1])) 		#p value


#####################################################################
#		Randomization Inference -- Ministers as counterfactual pool	#
#####################################################################

cset <- cset2
csetl <- cset2l
sims <- 1000
coefs.m <- matrix(nrow=sims, ncol = 2)		#matrix of blank coefficients

for (i in 1:sims){
if((i %% 100==0)==T){print(paste('On',i,'of',sims,'rows,',round((i/sims)*100,2),'percent'))}
	x <- sample(cset, nrow(dat), replace=T)				#Sampling, with replacement, regions of 202 prefects (ataset is 202 rows)
	mat <- matrix(nrow=nrow(dat), ncol=8) 				#Matrix "identical" to actual data but switch PoBRegions based on draws
	mat <- as.data.frame(mat)
	
	mat[,1] <- x 										#or x_withtenurelength if we do tenure
	mat[,2] <- dat$Portfolio 							#Taking Portfolio names from actual data
	mat[,3] <- dat$AppointedRegion 						#Recording region of appointment
	for(j in 1:nrow(mat)){
		mat[j,4] <- ifelse(as.character(mat$V1[j])==as.character(mat$V3[j]), 1, 0)	#Coding whether there is a "match"
	}
	mat[,4] <- (1 - mat$V4)								#Coding whether there is no "match"
	mat[,5] <- sample(csetl, nrow(dat), replace=T)		#Sampling loyalty variable. Must be T when cset1 because of sample size
	mat[,6] <- dat$AppointedRegionPopulation2007
	mat[,7] <- dat$AppointedRegionHostile
	mat[,8] <- dat$AppointedRegionArea

	colnames(mat) <- c("RandomOfficer", "Portfolio", "AppointedRegion", "NonNative", "CivilWarPost", "AppointedRegionPopulation2007", "AppointedRegionHostile", "AppointedRegionArea")

	ri <- glm(NonNative ~ CivilWarPost + AppointedRegionPopulation2007 + AppointedRegionHostile + AppointedRegionArea, family="binomial", dat=mat)		#Randomization inference model for new dataset
	summary(ri)
#	cl.ri <- summary(ri)$coefficients[,2]				#getting SEs

	coefs.m[i,1] <- summary(ri)$coefficients[4,1] 			#recording coefficient on explanatory variable
	coefs.m[i,2] <- summary(ri)$coefficients[4,4] 		#recording SEs on main explanatory variable
}

hist(coefs.m[,1], xlim=c(-3, 8), breaks = 200, main = "Randomization Inference for Sassou Nguesso's Congo
 (using Ministers)", xlab = "Coefficient Values on Main Result, 1000 simulations")
abline(v=6.69, col="Red", lty=2)						#Coefficient from main tables
dnorm(6.69, mean=mean(coefs.m[,1]),sd=sd(coefs.m[,1])) 		#p value


#####################################################################
#		Randomization Inference -- Elites as counterfactual pool	#
#####################################################################

cset <- cset3
csetl <- cset3l
sims <- 1000
coefs.e <- matrix(nrow=sims, ncol = 2)		#matrix of blank coefficients

for (i in 1:sims){
if((i %% 100==0)==T){print(paste('On',i,'of',sims,'rows,',round((i/sims)*100,2),'percent'))}
	x <- sample(cset, nrow(dat), replace=T)				#Sampling, with replacement, regions of 202 prefects (ataset is 202 rows)
	mat <- matrix(nrow=nrow(dat), ncol=8) 				#Matrix "identical" to actual data but switch PoBRegions based on draws
	mat <- as.data.frame(mat)
	
	mat[,1] <- x 										#or x_withtenurelength if we do tenure
	mat[,2] <- dat$Portfolio 							#Taking Portfolio names from actual data
	mat[,3] <- dat$AppointedRegion 						#Recording region of appointment
	for(j in 1:nrow(mat)){
		mat[j,4] <- ifelse(as.character(mat$V1[j])==as.character(mat$V3[j]), 1, 0)	#Coding whether there is a "match"
	}
	mat[,4] <- (1 - mat$V4)								#Coding whether there is no "match"
	mat[,5] <- sample(csetl, nrow(dat), replace=T)		#Sampling loyalty variable. Must be T when cset1 because of sample size
	mat[,6] <- dat$AppointedRegionPopulation2007
	mat[,7] <- dat$AppointedRegionHostile
	mat[,8] <- dat$AppointedRegionArea

	colnames(mat) <- c("RandomOfficer", "Portfolio", "AppointedRegion", "NonNative", "CivilWarPost", "AppointedRegionPopulation2007", "AppointedRegionHostile", "AppointedRegionArea")

	ri <- glm(NonNative ~ CivilWarPost + AppointedRegionPopulation2007 + AppointedRegionHostile + AppointedRegionArea, family="binomial", dat=mat)		#Randomization inference model for new dataset
	summary(ri)
#	cl.ri <- summary(ri)$coefficients[,2]				#getting SEs

	coefs.e[i,1] <- summary(ri)$coefficients[4,1] 			#recording coefficient on explanatory variable
	coefs.e[i,2] <- summary(ri)$coefficients[4,4] 		#recording SEs on main explanatory variable
}

hist(coefs.e[,1], xlim=c(-3, 8), breaks = 200, main = "Randomization Inference for Sassou Nguesso's Congo
 (using High Level Appointees)", xlab = "Coefficient Values on Main Result, 1000 simulations")
abline(v=6.69, col="Red", lty=2)						#Coefficient from main tables
dnorm(6.69, mean=mean(coefs.e[,1]),sd=sd(coefs.e[,1])) 		#p value


#####################################################################
#					Randomization Inference -- Figure				#
#####################################################################

par = c(4, 2, 10, 2)
par(mfrow=c(2, 2))

hist(coefs.p[,1], xlim=c(-3, 8), ylim=c(0,90), breaks = 200, main="Congo: Sassou Nguesso \n (using all prefects)", xlab = "Coefficient Values on Main Result, 1000 simulations")
abline(v=6.69, col="Red", lty=2)						#Coefficient from main tables
dnorm(6.69, mean=mean(coefs.p[,1]),sd=sd(coefs.p[,1])) 		#p value

hist(coefs.m[,1], xlim=c(-3, 8), ylim=c(0,90), breaks = 50, main="Congo: Sassou Nguesso \n (using all ministers)", xlab = "Coefficient Values on Main Result, 1000 simulations")
abline(v=6.69, col="Red", lty=2)						#Coefficient from main tables
dnorm(6.69, mean=mean(coefs.m[,1]),sd=sd(coefs.m[,1])) 		#p value

hist(coefs.e[,1], xlim=c(-3, 8), ylim=c(0,90), breaks = 200, main="Congo: Sassou Nguesso \n (using major/mid level appointees)", xlab = "Coefficient Values on Main Result, 1000 simulations")
abline(v=6.69, col="Red", lty=2)						#Coefficient from main tables
dnorm(6.69, mean=mean(coefs.e[,1]),sd=sd(coefs.e[,1])) 		#p value

setwd("/Users/blcarter3/Dropbox/CarterHassan/Appendix")
dev.print(file="Randomization_Inference_Congo.pdf",device=pdf)
